setwd("/Users/TARDIS/Documents/STUDIES/context_word_seg")
library(ProjectTemplate)
load.project()
######
# WL #
######
WL <- df_WL %>%
mutate_each(funs(ifelse(. > 0, 1, 0)), -utt, -orth, -phon) # for word list analysis only, make all contexts either 1 or 0 (smoothing over 1.5's from expand_windows)
WL_sum <- data.frame(context = names(colSums(dplyr::select(WL, -utt, -orth, -phon))), n=colSums(dplyr::select(WL, -utt, -orth, -phon))) %>%
mutate(method="word lists")
WL_sum %>%
select(-method) %>%
arrange(desc(n)) %>%
kable()
| context | n |
|---|---|
| body_touch | 1530 |
| meal | 942 |
| fussing | 741 |
| bath | 664 |
| bed | 570 |
| play | 528 |
| diaper_dressing | 471 |
| media | 20 |
######
# HJ #
######
HJ_sum <- data.frame(context=names(colSums(dplyr::select(df_HJ_bin, -utt, -orth, -phon), na.rm=TRUE)), n=colSums(dplyr::select(df_HJ_bin, -utt, -orth, -phon), na.rm=TRUE)) %>%
mutate(method="coder judgments")
HJ_sum %>%
select(-method) %>%
arrange(desc(n)) %>%
kable()
| context | n |
|---|---|
| playtime | 4780 |
| interaction | 4485 |
| fussing | 2706 |
| mealtime | 1832 |
| bathtime | 1230 |
| none | 941 |
| diaper change | 672 |
| dressing | 441 |
| sleep | 397 |
| housework | 291 |
| touching | 283 |
| taking pictures | 122 |
| hiccups | 80 |
| TV | 39 |
| outside | 36 |
| sick | 19 |
| coughing | 15 |
| stretching | 13 |
| sneezing | 12 |
| swimming | 12 |
| music | 10 |
| drooling | 4 |
| blowing bubbles | 2 |
| friends | 2 |
| pre-doctors visit | 1 |
######
# TM #
######
LDA_sum <- data.frame(context=names(colSums(dplyr::select(df_LDA_bin, -utt, -orth, -phon), na.rm=TRUE)), n=colSums(dplyr::select(df_LDA_bin, -utt, -orth, -phon), na.rm=TRUE)) %>%
mutate(method="LDA")
LDA_sum %>%
select(-method) %>%
arrange(desc(n)) %>%
kable()
| context | n |
|---|---|
| topic_11 | 2292 |
| topic_5 | 1887 |
| topic_3 | 1616 |
| topic_4 | 1544 |
| topic_2 | 1478 |
| topic_7 | 1251 |
| topic_6 | 1187 |
| topic_9 | 1176 |
| topic_8 | 1090 |
| topic_12 | 1020 |
| topic_1 | 736 |
| topic_10 | 613 |
STM_sum <- data.frame(context=names(colSums(dplyr::select(df_STM_bin, -utt, -orth, -phon), na.rm=TRUE)), n=colSums(dplyr::select(df_STM_bin, -utt, -orth, -phon), na.rm=TRUE)) %>%
mutate(method="STM")
STM_sum %>%
select(-method) %>%
arrange(desc(n)) %>%
kable()
| context | n |
|---|---|
| topic_3 | 2174 |
| topic_10 | 1938 |
| topic_4 | 1775 |
| topic_7 | 1733 |
| topic_1 | 1575 |
| topic_2 | 1392 |
| topic_8 | 1066 |
| topic_6 | 930 |
| topic_9 | 845 |
| topic_5 | 840 |
| topic_12 | 548 |
| topic_11 | 540 |
all_sum <- rbind(WL_sum, HJ_sum, LDA_sum, STM_sum)
ggplot(all_sum, aes(y=n, x=reorder(as.factor(context), n))) +
geom_bar(stat = 'identity', show.legend = FALSE) +
geom_hline(aes(yintercept=nrow(df)), lty=2) +
facet_wrap(~method, scales = "free_x", ncol=1) +
ggtitle("Number of utterances in each context\ndashed line shows total corpus size")
######
# WL #
######
WL$N.contexts <- rowSums(dplyr::select(WL, -utt, -orth, -phon))
table(WL$N.contexts)
##
## 0 1 2 3
## 8334 3924 693 52
WL <- extract_contexts(WL)
summary(WL$context) # the number of utterances in each context
## ambiguous bath bed body_touch
## 745 440 377 1147
## diaper_dressing fussing meal media
## 295 557 706 12
## no context tag play
## 8334 390
length(which(WL$N.contexts > 2)) / length(WL$N.contexts) # percent of corpus with > 2 contexts
## [1] 0.003999077
length(which(WL$N.contexts == 0)) / length(WL$N.contexts) # percent of corpus with 0 contexts
## [1] 0.640929
length(which(WL$N.contexts > 2)) / length(which(WL$N.contexts > 0))
## [1] 0.01113729
length(which(WL$N.contexts > 1)) / length(which(WL$N.contexts > 0))
## [1] 0.1595631
######
# HJ #
######
df_HJ_bin$N.contexts <- rowSums(dplyr::select(df_HJ_bin, -utt, -orth, -phon), na.rm = TRUE)
table(df_HJ_bin$N.contexts)
##
## 0 1 2 3
## 501 6911 5259 332
df_HJ_bin <- extract_contexts(df_HJ_bin)
summary(df_HJ_bin$context) # the number of utterances in each context
## ambiguous bathtime diaper change dressing
## 5591 877 292 148
## fussing hiccups housework interaction
## 794 18 99 1335
## mealtime no context tag none outside
## 772 501 171 4
## playtime sleep sneezing taking pictures
## 2216 124 2 14
## touching TV
## 26 19
length(which(df_HJ_bin$N.contexts > 2)) / length(df_HJ_bin$N.contexts) # percent of corpus with > 2 contexts
## [1] 0.02553257
length(which(df_HJ_bin$N.contexts == 0)) / length(df_HJ_bin$N.contexts) # percent of corpus with 0 contexts
## [1] 0.03852957
######
# TM #
######
df_LDA_bin$N.contexts <- rowSums(dplyr::select(df_LDA_bin, -utt, -orth, -phon), na.rm = TRUE)
table(df_LDA_bin$N.contexts)
##
## 0 1 2 3
## 31 9100 3350 30
df_LDA_bin <- extract_contexts(df_LDA_bin)
summary(df_LDA_bin$context) # the number of utterances in each context
## ambiguous no context tag topic_1 topic_10 topic_11
## 3380 31 286 300 1440
## topic_12 topic_2 topic_3 topic_4 topic_5
## 570 968 610 1065 1137
## topic_6 topic_7 topic_8 topic_9
## 679 837 758 450
length(which(df_LDA_bin$N.contexts > 2)) / length(df_LDA_bin$N.contexts) # percent of corpus with > 2 contexts
## [1] 0.00239789
length(which(df_LDA_bin$N.contexts == 0)) / length(df_LDA_bin$N.contexts) # percent of corpus with 0 contexts
## [1] 0.00247782
df_STM_bin$N.contexts <- rowSums(dplyr::select(df_STM_bin, -utt, -orth, -phon), na.rm = TRUE)
table(df_STM_bin$N.contexts)
##
## 0 1 2 3
## 213 9300 2938 60
df_STM_bin <- extract_contexts(df_STM_bin)
summary(df_STM_bin$context) # the number of utterances in each context
## ambiguous no context tag topic_1 topic_10 topic_11
## 2998 213 621 936 390
## topic_12 topic_2 topic_3 topic_4 topic_5
## 428 750 1432 1178 540
## topic_6 topic_7 topic_8 topic_9
## 540 1256 766 463
length(which(df_STM_bin$N.contexts > 2)) / length(df_STM_bin$N.contexts) # percent of corpus with > 2 contexts
## [1] 0.00479578
length(which(df_STM_bin$N.contexts == 0)) / length(df_STM_bin$N.contexts) # percent of corpus with 0 contexts
## [1] 0.01702502
###########
# OVERALL #
###########
wl_contexts <- dplyr::select(WL, utt, N.contexts, context) %>%
mutate(method = "word list")
hj_contexts <- dplyr::select(df_HJ_bin, utt, N.contexts, context) %>%
mutate(method = "coder judgments")
lda_contexts <- dplyr::select(df_LDA_bin, utt, N.contexts, context) %>%
mutate(method = "LDA")
stm_contexts <- dplyr::select(df_STM_bin, utt, N.contexts, context) %>%
mutate(method = "STM")
all_methods_counts <- rbind(wl_contexts, hj_contexts, lda_contexts, stm_contexts) %>%
count(method, N.contexts)
ggplot(all_methods_counts, aes(y=n, x=method, fill=as.factor(N.contexts))) +
geom_bar(stat = "identity") +
labs(title = "Number of contexts tagged per utterance", y="Number of utterances", x = "Context defined by") +
scale_fill_discrete(name="Number of contexts")
df_HJ_bin_no_none <- dplyr::select(df_HJ_bin, -none, -N.contexts, -context)
df_HJ_bin_no_none$N.contexts <- rowSums(dplyr::select(df_HJ_bin_no_none, -utt, -orth, -phon), na.rm = TRUE)
df_HJ_bin_no_none <- extract_contexts(df_HJ_bin_no_none)
hj_no_none <- dplyr::select(df_HJ_bin_no_none, utt, N.contexts, context) %>%
mutate(method = "coder judgments")
all_methods_counts_no_none <- rbind(wl_contexts, hj_no_none, lda_contexts, stm_contexts) %>%
count(method, N.contexts)
ggplot(all_methods_counts_no_none, aes(y=n, x=method, fill=as.factor(N.contexts))) +
geom_bar(stat = "identity") +
labs(title = "Number of contexts tagged per utterance,\nnot including 'none' for coder judgments", y="Number of utterances", x = "Context defined by") +
scale_fill_discrete(name="Number of contexts")
table(hj_contexts$N.contexts)
##
## 0 1 2 3
## 501 6911 5259 332
table(hj_no_none$N.contexts)
##
## 0 1 2 3
## 672 7429 4651 251
all_methods_context_count <- rbind(wl_contexts, hj_contexts, lda_contexts, stm_contexts) %>%
count(method, context)
all_methods_context_count$context <- relevel(all_methods_context_count$context, "none" )
all_methods_context_count$context <- relevel(all_methods_context_count$context, "no context tag" )
all_methods_context_count$context <- relevel(all_methods_context_count$context, "ambiguous" )
ggplot(all_methods_context_count, aes(y=n, x=method, fill=context)) +
geom_bar(stat= "identity") +
scale_fill_manual(values = c("#0072B2", "#D55E00", "#E69F00", rep("#999999", length(levels(all_methods_context_count$context)) - 3)))
Printing context files for sharing with CF:
all <- rbind(wl_contexts, hj_no_none, lda_contexts, stm_contexts) %>%
dplyr::select(utt, context, method) %>%
tidyr::extract(utt, into = c("child", "age", "utt.num"), regex = "^([[:alpha:]]{2})([[:digit:]]{2})[.]cha_([[:digit:]]+)$")
all$utt.num <- as.numeric(all$utt.num)
all %>%
dplyr::filter(method=="word list") %>%
tidyr::spread(key=utt.num, value=context) %>%
dplyr::select(-method) %>%
write.csv("/Users/TARDIS/Dropbox/2_RoseM_TP/context_files/contexts_file_WL.csv", row.names=FALSE)
all %>%
dplyr::filter(method=="coder judgments") %>%
tidyr::spread(key=utt.num, value=context) %>%
dplyr::select(-method) %>%
write.csv("/Users/TARDIS/Dropbox/2_RoseM_TP/context_files/contexts_file_HJ.csv", row.names=FALSE)
all %>%
dplyr::filter(method=="LDA") %>%
tidyr::spread(key=utt.num, value=context) %>%
dplyr::select(-method) %>%
write.csv("/Users/TARDIS/Dropbox/2_RoseM_TP/context_files/contexts_file_LDA.csv", row.names=FALSE)
all %>%
dplyr::filter(method=="STM") %>%
tidyr::spread(key=utt.num, value=context) %>%
dplyr::select(-method) %>%
write.csv("/Users/TARDIS/Dropbox/2_RoseM_TP/context_files/contexts_file_STM.csv", row.names=FALSE)
Dropping “none” codes from HJ.
WL <- WL
HJ <- df_HJ_bin_no_none
LDA <- df_LDA_bin
STM <- df_STM_bin
The word list and coder definitions of context naturally produce a skewed distribution of activities, whereas the topic modeling approaches discover a more uniform distribution of activities. To the extent that the distritbution of activities is naturally skewed (e.g. that a few activities happen very often, and many others happen rarely), topic modeling approaches to identifying activity context may distort reality.
contexts <- names(WL_contexts)
WL_context_data <- data.frame(NULL)
for(k in contexts){
WL_context_data <- rbind(WL_context_data, data.frame(word=WL_contexts[[k]], context=k, stringsAsFactors = FALSE))
}
orth_stream <- paste(df$orth, collapse = " ")
# flag bigrams from WL keywords in orth stream, so they don't get separated
for(w_bar in grep(x = WL_context_data$word, pattern = "_", value=TRUE)){
w_space <- gsub(x=w_bar, pattern="_", replacement=" ")
orth_stream <- gsub(x=orth_stream, pattern = w_space, replacement = w_bar)
}
orth_stream <- strsplit(orth_stream, split=" ")[[1]]
orth_stream <- orth_stream[orth_stream != ""]
orth_data <- data.frame(word=orth_stream, stringsAsFactors = FALSE) %>%
count(word)
WL_context_data <- left_join(WL_context_data, orth_data, by="word") %>%
arrange(context, n)
WL_context_data %>%
group_by(context) %>%
summarise(total=sum(n, na.rm=TRUE),
mean.freq=mean(n, na.rm=TRUE),
highest=max(n, na.rm=TRUE),
which.highest=word[which.max(n)]) %>%
kable()
| context | total | mean.freq | highest | which.highest |
|---|---|---|---|---|
| bath | 287 | 15.105263 | 47 | bath |
| bed | 150 | 13.636364 | 43 | tired |
| body_touch | 550 | 20.370370 | 123 | tickle |
| diaper_dressing | 116 | 6.444444 | 50 | nappie |
| fussing | 216 | 19.636364 | 56 | ssh |
| meal | 303 | 8.416667 | 27 | eat |
| media | 4 | 2.000000 | 3 | television |
| play | 136 | 7.157895 | 65 | play |
df_HJ_none <- df_HJ_bin
df_HJ_none$HJ_none <- ifelse(df_HJ_none$none==1, 1,
ifelse(df_HJ_none$none==0, 0, NA))
df_HJ_none <- dplyr::select(df_HJ_none, utt, HJ_none)
df_WL_0 <- WL
df_WL_0$WL_0 <- ifelse(df_WL_0$N.contexts == 0, 1,
ifelse(df_WL_0$N.contexts > 0, 0, NA))
df_WL_0 <- dplyr::select(df_WL_0, utt, WL_0)
match <- full_join(df_HJ_none, df_WL_0, by="utt") %>%
mutate(match = HJ_none + WL_0)
match$match <- ifelse(match$match == 2, 1,
ifelse(match$match < 2, 0, NA))
nrow(df_WL_0); nrow(df_HJ_none); nrow(match)
## [1] 13003
## [1] 13003
## [1] 13003
tab <- xtabs( ~ WL_0 + HJ_none, data = match)
addmargins(tab)
## HJ_none
## WL_0 0 1 Sum
## 0 4395 188 4583
## 1 7406 753 8159
## Sum 11801 941 12742
summary(tab)
## Call: xtabs(formula = ~WL_0 + HJ_none, data = match)
## Number of cases in table: 12742
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 112.78, df = 1, p-value = 2.41e-26
mosaic(tab)
assocplot(tab)
assocstats(tab)
## X^2 df P(> X^2)
## Likelihood Ratio 122.91 1 0
## Pearson 112.78 1 0
##
## Phi-Coefficient : 0.094
## Contingency Coeff.: 0.094
## Cramer's V : 0.094
What percent of the “none” context utterances in HJ method are 0 context in WL?
sum(match$match, na.rm=TRUE) / sum(match$HJ_none, na.rm=TRUE)
## [1] 0.8002125
What percent of the 0 context utterances in WL method are “none” context in HJ?
sum(match$match, na.rm=TRUE) / sum(match$WL_0, na.rm=TRUE)
## [1] 0.09035277
What words are associated with each LDA topic? LDAvis: http://bl.ocks.org/rosemm/raw/a7b1ac43ffe3b49229ed5e866762613f/
# http://cpsievert.github.io/LDAvis/reviews/reviews.html
alpha <- 0.1 # from lda package demo
eta <- 0.1 # from lda package demo
theta <- t(apply(lda$document_sums + alpha, 2, function(x) x/sum(x)))
phi <- t(apply(t(lda$topics) + eta, 2, function(x) x/sum(x)))
D <- length(TM_doc_prep_out$documents) # number of documents
W <- length(TM_doc_prep_out$vocab) # number of terms in the vocab
doc.length <- document.lengths(TM_doc_prep_out$documents) # number of tokens per document
N <- sum(doc.length) # total number of tokens in the data
term.frequency <- word.counts(TM_doc_prep_out$documents, vocab = TM_doc_prep_out$vocab)
lda_data <- list(phi = phi,
theta = theta,
doc.length = doc.length,
vocab = TM_doc_prep_out$vocab,
term.frequency = as.integer(term.frequency))
json <- createJSON(phi = phi,
theta = theta,
doc.length = doc.length,
vocab = TM_doc_prep_out$vocab,
term.frequency = as.integer(term.frequency))
serVis(json, as.gist = TRUE)
Wordles, showing the frequencies of words in the utterances assigned to each topic.
cloud_from_df(df_LDA_bin, "topic_1")
cloud_from_df(df_LDA_bin, "topic_2")
cloud_from_df(df_LDA_bin, "topic_3")
cloud_from_df(df_LDA_bin, "topic_4")
cloud_from_df(df_LDA_bin, "topic_5")
cloud_from_df(df_LDA_bin, "topic_6")
cloud_from_df(df_LDA_bin, "topic_7")
cloud_from_df(df_LDA_bin, "topic_8")
cloud_from_df(df_LDA_bin, "topic_9")
cloud_from_df(df_LDA_bin, "topic_10")
cloud_from_df(df_LDA_bin, "topic_11")
cloud_from_df(df_LDA_bin, "topic_12")
What words are associated with each STM topic?
summary(stm)
## A topic model with 12 topics, 427 documents and a 531 word dictionary.
## Topic Words:
## Topic 1: side, lost, dear, round, ha, said, teddi
## Topic 2: bop, hello, monkey, give, hi, thing, say
## Topic 3: shh, hey, shake, rabbit, els, chou, light
## Topic 4: door, lunch, minut, sit, lucki, sweetheart, watch
## Topic 5: tickl, toe, feet, din, tick, tum, boy
## Topic 6: away, one, nappi, will, wee, sorri, milk
## Topic 7: hair, water, bad, fun, minut, deari, drink
## Topic 8: bum, bath, splash, swim, shake, dad, mum
## Topic 9: boo, bye, ah, clever, hide, see, tea
## Topic 10: hand, see, naughti, fed, can, bite, look
## Topic 11: juic, whether, duck, quack, miss, leav, madam
## Topic 12: dear, got, oh, hello, shame
##
## Covariate Words:
## Group cr: grumpi, gut, gorgeous, hmm, full, fast, shall
## Group gi: jumper, throat, without, keep, christoph, bash, reach
## Group gl: precious, handi, snooz, temper, mat, slept, happi
## Group la: hannah, busi, usual, nosh, prove, mummi, whose
## Group st: mobil, oven, cradl, appl, pussycat, cupsi, shoulder
##
## Topic-Covariate Interactions:
## Topic 1, Group cr: pretti, forget, nail, whee, let, dress, tear
## Topic 1, Group gi: clap, hand, hiccough, might, hah, christoph, left
## Topic 1, Group gl: struggl, walk, lost, aw, danc, punch, chin
## Topic 1, Group la: dub, bib, dear, bath, someth, hurt, whee
## Topic 1, Group st: hmm, stuck, okay, push, myron, kick, camera
##
## Topic 2, Group cr: ticki, shake, excit, bore, gillian, chou, camera
## Topic 2, Group gi: oop, us, stuff, bash, christoph, hello, friend
## Topic 2, Group gl: hmm
## Topic 2, Group la: pretti, skirt, seen, sunshin, kick, hello, bop
## Topic 2, Group st: foot, attent, grab, must, nose, matter, play
##
## Topic 3, Group cr: shush, matter, pleas, dolli, gillian, ah, hey
## Topic 3, Group gi: littl, re, wind, kick, shake, lad, chou
## Topic 3, Group gl: bop, rest, talk, oop, tri, bore, bib
## Topic 3, Group la: polli, near, thank, oop, bubbl, treasur, ho
## Topic 3, Group st: food, swing, smile, myron, suppos, move, ssh
##
## Topic 4, Group cr: girl, love, nose, bubbl, littl, bless
## Topic 4, Group gi: gonna, lad, ticklish, clean, never, mum, hous
## Topic 4, Group gl: first, know, ssh, stuff, wet, will, happen
## Topic 4, Group la: iron, arm, downstair, cuddl, god, mm, quiet
## Topic 4, Group st: daddi, goin, readi, burp, wind, ya, wash
##
## Topic 5, Group cr: belli, soft, wee, stori, din, nose, shout
## Topic 5, Group gi: clever, parrot, yum, complain, sack, daisi, terribl
## Topic 5, Group gl: find, tum, old, hah, dinner, thumb, lie
## Topic 5, Group la: excit, three, beebo, two, tum, get, one
## Topic 5, Group st: feet, toe, boy
##
## Topic 6, Group cr: sleepi, huh, head, shout, dolli, eye, chair
## Topic 6, Group gi: burp, suck, vest, carri, roll, bib, finger
## Topic 6, Group gl: hide, astra, three, ah, daisi, hiccup, chang
## Topic 6, Group la: sunshin, clean, wee, straight, nappi, morn, one
## Topic 6, Group st: charl, bib, ever, gonna, forget, tri, whee
##
## Topic 7, Group cr: gonna, tell, cross, patch, shout, madam, stori
## Topic 7, Group gi: fatti, relax, grab, ow, walk, lift, dad
## Topic 7, Group gl: petal, tale, pop, pet, bath, wash, dirti
## Topic 7, Group la: dub, stori, tell, daddi, rub, mum, wash
## Topic 7, Group st: lulu, hot, bottl, wide, juic, garden, first
##
## Topic 8, Group cr: bath, bum
## Topic 8, Group gi: arm, struggl, naughti, stick, nail, wash, let
## Topic 8, Group gl: stori, wee, bubbl, next, ha, aw, mess
## Topic 8, Group la: will, splish, splosh, tonight, whee, air, mm
## Topic 8, Group st: say, anyth, cuddl, myron, shake, milk, hello
##
## Topic 9, Group cr: oop, cross, patch, shush, sack, re, babi
## Topic 9, Group gi: parrot, everyth, best, mouth, cup, teeth, hair
## Topic 9, Group gl: astra, myron, anoth, tomorrow, yeah, clean, one
## Topic 9, Group la: lambchop, sleepi, watch, chou, love, girl, tri
## Topic 9, Group st: hide, stand, ah, littl, bye
##
## Topic 10, Group cr: toe, stretch, game, play, can, tongu, goin
## Topic 10, Group gi: clap, splash, shout, push, stand, soon, ya
## Topic 10, Group gl: pretti, pram, can, bear, afternoon, play, tell
## Topic 10, Group la: lambchop, chou, friend, littl, smile, hmm, daddi
## Topic 10, Group st: polli, lewi, pastri, joseph, rain, though, thank
##
## Topic 11, Group cr: attent, ya, bib, matter, think, minut, alright
## Topic 11, Group gi: duck, quack, bath, will, look, left, minut
## Topic 11, Group gl: get, sunshin, dress, tongu, arm, stori, never
## Topic 11, Group la: paddi, donald, yum, quack, came, point, silli
## Topic 11, Group st: meringu, dub, pie, lemon, rub, joseph, cupboard
##
## Topic 12, Group cr: shame, burp, fatti, hicki, mind, poor, never
## Topic 12, Group gi: nose, stretch, daddi, size, christoph
## Topic 12, Group gl: smile, daddi, dad, bless, tis, stay, girli
## Topic 12, Group la: ssh, trea, pie, alright, armi, darl, funni
## Topic 12, Group st:
##
# http://cpsievert.github.io/LDAvis/reviews/reviews.html
toLDAvis(stm, TM_doc_prep_out$documents, as.gist = TRUE) # This function does not yet allow content covariates.
Wordles, showing the probability of each word given the topic.
cloud(stm, topic=1)
cloud(stm, topic=2)
cloud(stm, topic=3)
cloud(stm, topic=4)
cloud(stm, topic=5)
cloud(stm, topic=6)
cloud(stm, topic=7)
cloud(stm, topic=8)
cloud(stm, topic=9)
cloud(stm, topic=10)
cloud(stm, topic=11)
cloud(stm, topic=12)
Wordles, showing the frequencies of words in the utterances assigned to each topic.
cloud_from_df(df_STM_bin, "topic_1")
cloud_from_df(df_STM_bin, "topic_2")
cloud_from_df(df_STM_bin, "topic_3")
cloud_from_df(df_STM_bin, "topic_4")
cloud_from_df(df_STM_bin, "topic_5")
cloud_from_df(df_STM_bin, "topic_6")
cloud_from_df(df_STM_bin, "topic_7")
cloud_from_df(df_STM_bin, "topic_8")
cloud_from_df(df_STM_bin, "topic_9")
cloud_from_df(df_STM_bin, "topic_10")
cloud_from_df(df_STM_bin, "topic_11")
cloud_from_df(df_STM_bin, "topic_12")